import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
import plotly.express as px
df = pd.read_csv('covid_19_india.csv')
df.head()
| Sno | Date | Time | State/UnionTerritory | ConfirmedIndianNational | ConfirmedForeignNational | Cured | Deaths | Confirmed | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 2020-01-30 | 6:00 PM | Kerala | 1 | 0 | 0 | 0 | 1 |
| 1 | 2 | 2020-01-31 | 6:00 PM | Kerala | 1 | 0 | 0 | 0 | 1 |
| 2 | 3 | 2020-02-01 | 6:00 PM | Kerala | 2 | 0 | 0 | 0 | 2 |
| 3 | 4 | 2020-02-02 | 6:00 PM | Kerala | 3 | 0 | 0 | 0 | 3 |
| 4 | 5 | 2020-02-03 | 6:00 PM | Kerala | 3 | 0 | 0 | 0 | 3 |
#keeping only required columns
df = df[['Date', 'State/UnionTerritory','Cured','Deaths','Confirmed']]
#renaming column names
df.columns = ['date', 'state','cured','deaths','confirmed']
df.shape
(15554, 5)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 15554 entries, 0 to 15553 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 15554 non-null object 1 state 15554 non-null object 2 cured 15554 non-null int64 3 deaths 15554 non-null int64 4 confirmed 15554 non-null int64 dtypes: int64(3), object(2) memory usage: 607.7+ KB
df.describe()
| cured | deaths | confirmed | |
|---|---|---|---|
| count | 1.555400e+04 | 15554.000000 | 1.555400e+04 |
| mean | 1.898184e+05 | 2898.898804 | 2.110835e+05 |
| std | 4.061648e+05 | 7637.201754 | 4.542665e+05 |
| min | 0.000000e+00 | 0.000000 | 0.000000e+00 |
| 25% | 1.950250e+03 | 14.000000 | 3.177000e+03 |
| 50% | 2.141550e+04 | 374.000000 | 2.874900e+04 |
| 75% | 2.151482e+05 | 2527.000000 | 2.387830e+05 |
| max | 5.395370e+06 | 95344.000000 | 5.746892e+06 |
print(df.columns)
missing_values = pd.isnull(df.columns)
print(missing_values)
Index(['date', 'state', 'cured', 'deaths', 'confirmed'], dtype='object') [False False False False False]
df.deaths.value_counts()
0 1718
1 527
2 417
3 195
4 174
...
3685 1
23695 1
3677 1
24151 1
10297 1
Name: deaths, Length: 5287, dtype: int64
df.confirmed.value_counts()
1 263
2 117
7 92
3 88
33 63
...
92536 1
905591 1
33141 1
750517 1
237552 1
Name: confirmed, Length: 12522, dtype: int64
today = df[df.date == '2021-05-19']
today
| date | state | cured | deaths | confirmed | |
|---|---|---|---|---|---|
| 15050 | 2021-05-19 | Andaman and Nicobar Islands | 6359 | 92 | 6674 |
| 15051 | 2021-05-19 | Andhra Pradesh | 1254291 | 9580 | 1475372 |
| 15052 | 2021-05-19 | Arunachal Pradesh | 19977 | 88 | 22462 |
| 15053 | 2021-05-19 | Assam | 290774 | 2344 | 340858 |
| 15054 | 2021-05-19 | Bihar | 595377 | 4039 | 664115 |
| 15055 | 2021-05-19 | Chandigarh | 48831 | 647 | 56513 |
| 15056 | 2021-05-19 | Chhattisgarh | 823113 | 12036 | 925531 |
| 15057 | 2021-05-19 | Dadra and Nagar Haveli and Daman and Diu | 8944 | 4 | 9652 |
| 15058 | 2021-05-19 | Delhi | 1329899 | 22111 | 1402873 |
| 15059 | 2021-05-19 | Goa | 112633 | 2197 | 138776 |
| 15060 | 2021-05-19 | Gujarat | 660489 | 9269 | 766201 |
| 15061 | 2021-05-19 | Haryana | 626852 | 6923 | 709689 |
| 15062 | 2021-05-19 | Himachal Pradesh | 129330 | 2460 | 166678 |
| 15063 | 2021-05-19 | Jammu and Kashmir | 197701 | 3293 | 251919 |
| 15064 | 2021-05-19 | Jharkhand | 284805 | 4601 | 320934 |
| 15065 | 2021-05-19 | Karnataka | 1674487 | 22838 | 2272374 |
| 15066 | 2021-05-19 | Kerala | 1846105 | 6612 | 2200706 |
| 15067 | 2021-05-19 | Ladakh | 15031 | 170 | 16784 |
| 15068 | 2021-05-19 | Lakshadweep | 3915 | 15 | 5212 |
| 15069 | 2021-05-19 | Madhya Pradesh | 652612 | 7139 | 742718 |
| 15070 | 2021-05-19 | Maharashtra | 4927480 | 83777 | 5433506 |
| 15071 | 2021-05-19 | Manipur | 33466 | 612 | 40683 |
| 15072 | 2021-05-19 | Meghalaya | 19185 | 355 | 24872 |
| 15073 | 2021-05-19 | Mizoram | 7094 | 29 | 9252 |
| 15074 | 2021-05-19 | Nagaland | 14079 | 228 | 18714 |
| 15075 | 2021-05-19 | Odisha | 536595 | 2357 | 633302 |
| 15076 | 2021-05-19 | Puducherry | 69060 | 1212 | 87749 |
| 15077 | 2021-05-19 | Punjab | 427058 | 12317 | 511652 |
| 15078 | 2021-05-19 | Rajasthan | 713129 | 7080 | 879664 |
| 15079 | 2021-05-19 | Sikkim | 8427 | 212 | 11689 |
| 15080 | 2021-05-19 | Tamil Nadu | 1403052 | 18369 | 1664350 |
| 15081 | 2021-05-19 | Telangana | 485644 | 3012 | 536766 |
| 15082 | 2021-05-19 | Tripura | 36402 | 450 | 42776 |
| 15083 | 2021-05-19 | Uttarakhand | 214426 | 5132 | 295790 |
| 15084 | 2021-05-19 | Uttar Pradesh | 1483249 | 18072 | 1637663 |
| 15085 | 2021-05-19 | West Bengal | 1026492 | 13576 | 1171861 |
#Sorting data with number of confirmed cases
max_confirmed_cases=today.sort_values(by="confirmed",ascending=False)
max_confirmed_cases
| date | state | cured | deaths | confirmed | |
|---|---|---|---|---|---|
| 15070 | 2021-05-19 | Maharashtra | 4927480 | 83777 | 5433506 |
| 15065 | 2021-05-19 | Karnataka | 1674487 | 22838 | 2272374 |
| 15066 | 2021-05-19 | Kerala | 1846105 | 6612 | 2200706 |
| 15080 | 2021-05-19 | Tamil Nadu | 1403052 | 18369 | 1664350 |
| 15084 | 2021-05-19 | Uttar Pradesh | 1483249 | 18072 | 1637663 |
| 15051 | 2021-05-19 | Andhra Pradesh | 1254291 | 9580 | 1475372 |
| 15058 | 2021-05-19 | Delhi | 1329899 | 22111 | 1402873 |
| 15085 | 2021-05-19 | West Bengal | 1026492 | 13576 | 1171861 |
| 15056 | 2021-05-19 | Chhattisgarh | 823113 | 12036 | 925531 |
| 15078 | 2021-05-19 | Rajasthan | 713129 | 7080 | 879664 |
| 15060 | 2021-05-19 | Gujarat | 660489 | 9269 | 766201 |
| 15069 | 2021-05-19 | Madhya Pradesh | 652612 | 7139 | 742718 |
| 15061 | 2021-05-19 | Haryana | 626852 | 6923 | 709689 |
| 15054 | 2021-05-19 | Bihar | 595377 | 4039 | 664115 |
| 15075 | 2021-05-19 | Odisha | 536595 | 2357 | 633302 |
| 15081 | 2021-05-19 | Telangana | 485644 | 3012 | 536766 |
| 15077 | 2021-05-19 | Punjab | 427058 | 12317 | 511652 |
| 15053 | 2021-05-19 | Assam | 290774 | 2344 | 340858 |
| 15064 | 2021-05-19 | Jharkhand | 284805 | 4601 | 320934 |
| 15083 | 2021-05-19 | Uttarakhand | 214426 | 5132 | 295790 |
| 15063 | 2021-05-19 | Jammu and Kashmir | 197701 | 3293 | 251919 |
| 15062 | 2021-05-19 | Himachal Pradesh | 129330 | 2460 | 166678 |
| 15059 | 2021-05-19 | Goa | 112633 | 2197 | 138776 |
| 15076 | 2021-05-19 | Puducherry | 69060 | 1212 | 87749 |
| 15055 | 2021-05-19 | Chandigarh | 48831 | 647 | 56513 |
| 15082 | 2021-05-19 | Tripura | 36402 | 450 | 42776 |
| 15071 | 2021-05-19 | Manipur | 33466 | 612 | 40683 |
| 15072 | 2021-05-19 | Meghalaya | 19185 | 355 | 24872 |
| 15052 | 2021-05-19 | Arunachal Pradesh | 19977 | 88 | 22462 |
| 15074 | 2021-05-19 | Nagaland | 14079 | 228 | 18714 |
| 15067 | 2021-05-19 | Ladakh | 15031 | 170 | 16784 |
| 15079 | 2021-05-19 | Sikkim | 8427 | 212 | 11689 |
| 15057 | 2021-05-19 | Dadra and Nagar Haveli and Daman and Diu | 8944 | 4 | 9652 |
| 15073 | 2021-05-19 | Mizoram | 7094 | 29 | 9252 |
| 15050 | 2021-05-19 | Andaman and Nicobar Islands | 6359 | 92 | 6674 |
| 15068 | 2021-05-19 | Lakshadweep | 3915 | 15 | 5212 |
#Getting states with maximum number of confirmed cases
top_states_confirmed=max_confirmed_cases[0:5]
#Making bar-plot for states with top confirmed cases
sns.set(rc={'figure.figsize':(20,10)})
sns.barplot(x="state",y="confirmed",data=top_states_confirmed,hue="state")
plt.show()
#Sorting data with number of death cases
max_death_cases=today.sort_values(by="deaths",ascending=False)
max_death_cases
| date | state | cured | deaths | confirmed | |
|---|---|---|---|---|---|
| 15070 | 2021-05-19 | Maharashtra | 4927480 | 83777 | 5433506 |
| 15065 | 2021-05-19 | Karnataka | 1674487 | 22838 | 2272374 |
| 15058 | 2021-05-19 | Delhi | 1329899 | 22111 | 1402873 |
| 15080 | 2021-05-19 | Tamil Nadu | 1403052 | 18369 | 1664350 |
| 15084 | 2021-05-19 | Uttar Pradesh | 1483249 | 18072 | 1637663 |
| 15085 | 2021-05-19 | West Bengal | 1026492 | 13576 | 1171861 |
| 15077 | 2021-05-19 | Punjab | 427058 | 12317 | 511652 |
| 15056 | 2021-05-19 | Chhattisgarh | 823113 | 12036 | 925531 |
| 15051 | 2021-05-19 | Andhra Pradesh | 1254291 | 9580 | 1475372 |
| 15060 | 2021-05-19 | Gujarat | 660489 | 9269 | 766201 |
| 15069 | 2021-05-19 | Madhya Pradesh | 652612 | 7139 | 742718 |
| 15078 | 2021-05-19 | Rajasthan | 713129 | 7080 | 879664 |
| 15061 | 2021-05-19 | Haryana | 626852 | 6923 | 709689 |
| 15066 | 2021-05-19 | Kerala | 1846105 | 6612 | 2200706 |
| 15083 | 2021-05-19 | Uttarakhand | 214426 | 5132 | 295790 |
| 15064 | 2021-05-19 | Jharkhand | 284805 | 4601 | 320934 |
| 15054 | 2021-05-19 | Bihar | 595377 | 4039 | 664115 |
| 15063 | 2021-05-19 | Jammu and Kashmir | 197701 | 3293 | 251919 |
| 15081 | 2021-05-19 | Telangana | 485644 | 3012 | 536766 |
| 15062 | 2021-05-19 | Himachal Pradesh | 129330 | 2460 | 166678 |
| 15075 | 2021-05-19 | Odisha | 536595 | 2357 | 633302 |
| 15053 | 2021-05-19 | Assam | 290774 | 2344 | 340858 |
| 15059 | 2021-05-19 | Goa | 112633 | 2197 | 138776 |
| 15076 | 2021-05-19 | Puducherry | 69060 | 1212 | 87749 |
| 15055 | 2021-05-19 | Chandigarh | 48831 | 647 | 56513 |
| 15071 | 2021-05-19 | Manipur | 33466 | 612 | 40683 |
| 15082 | 2021-05-19 | Tripura | 36402 | 450 | 42776 |
| 15072 | 2021-05-19 | Meghalaya | 19185 | 355 | 24872 |
| 15074 | 2021-05-19 | Nagaland | 14079 | 228 | 18714 |
| 15079 | 2021-05-19 | Sikkim | 8427 | 212 | 11689 |
| 15067 | 2021-05-19 | Ladakh | 15031 | 170 | 16784 |
| 15050 | 2021-05-19 | Andaman and Nicobar Islands | 6359 | 92 | 6674 |
| 15052 | 2021-05-19 | Arunachal Pradesh | 19977 | 88 | 22462 |
| 15073 | 2021-05-19 | Mizoram | 7094 | 29 | 9252 |
| 15068 | 2021-05-19 | Lakshadweep | 3915 | 15 | 5212 |
| 15057 | 2021-05-19 | Dadra and Nagar Haveli and Daman and Diu | 8944 | 4 | 9652 |
#Getting states with maximum number of death cases
top_states_death=max_death_cases[0:5]
fig = px.bar(top_states_death,x="state",y="deaths")
fig.show()
#Making bar-plot for states with top death cases
sns.set(rc={'figure.figsize':(15,10)})
sns.barplot(x="state",y="deaths",data=top_states_death,hue="state")
plt.show()
#Rajasthan
RJ = df[df.state == 'Rajasthan']
RJ
| date | state | cured | deaths | confirmed | |
|---|---|---|---|---|---|
| 36 | 2020-03-03 | Rajasthan | 0 | 0 | 1 |
| 44 | 2020-03-04 | Rajasthan | 0 | 0 | 15 |
| 48 | 2020-03-05 | Rajasthan | 0 | 0 | 15 |
| 54 | 2020-03-06 | Rajasthan | 0 | 0 | 15 |
| 64 | 2020-03-07 | Rajasthan | 0 | 0 | 15 |
| ... | ... | ... | ... | ... | ... |
| 15402 | 2021-05-28 | Rajasthan | 851998 | 8103 | 931200 |
| 15438 | 2021-05-29 | Rajasthan | 863175 | 8181 | 933848 |
| 15474 | 2021-05-30 | Rajasthan | 871283 | 8251 | 936162 |
| 15510 | 2021-05-31 | Rajasthan | 880919 | 8317 | 938460 |
| 15546 | 2021-06-01 | Rajasthan | 888919 | 8385 | 939958 |
456 rows × 5 columns
#Visualizing confirmed cases in rajasthan
#sns.set(rc={'figure.figsize':(15,10)})
sns.lineplot(x="date",y="confirmed",data=RJ,color="r")
plt.show()
#Visualizing death cases in rajasthan
sns.set(rc={'figure.figsize':(15,10)})
sns.lineplot(x="date",y="deaths",data=RJ,color="r")
plt.show()
pip install plotly
Requirement already satisfied: plotly in c:\users\admin\anaconda3\lib\site-packages (4.14.3) Requirement already satisfied: six in c:\users\admin\anaconda3\lib\site-packages (from plotly) (1.15.0) Requirement already satisfied: retrying>=1.3.3 in c:\users\admin\anaconda3\lib\site-packages (from plotly) (1.3.3) Note: you may need to restart the kernel to use updated packages.
fig = px.scatter(RJ,x="date",y="confirmed")
fig.show()
fig = px.scatter(RJ,x="date",y="deaths")
fig.show()
fig = px.bar(RJ, x='date', y='deaths')
fig.show()
#Maharashtra
MH = df[df.state == 'Maharashtra']
MH
| date | state | cured | deaths | confirmed | |
|---|---|---|---|---|---|
| 76 | 2020-03-09 | Maharashtra | 0 | 0 | 2 |
| 91 | 2020-03-10 | Maharashtra | 0 | 0 | 5 |
| 97 | 2020-03-11 | Maharashtra | 0 | 0 | 2 |
| 120 | 2020-03-12 | Maharashtra | 0 | 0 | 11 |
| 133 | 2020-03-13 | Maharashtra | 0 | 0 | 14 |
| ... | ... | ... | ... | ... | ... |
| 15394 | 2021-05-28 | Maharashtra | 5276203 | 92225 | 5672180 |
| 15430 | 2021-05-29 | Maharashtra | 5307874 | 93198 | 5692920 |
| 15466 | 2021-05-30 | Maharashtra | 5339838 | 94030 | 5713215 |
| 15502 | 2021-05-31 | Maharashtra | 5362370 | 94844 | 5731815 |
| 15538 | 2021-06-01 | Maharashtra | 5395370 | 95344 | 5746892 |
450 rows × 5 columns
#Visualizing confirmed cases in maharashtra
sns.lineplot(x="date",y="confirmed",data=MH,color="r")
plt.show()
#Visualizing death cases in maharashtra
sns.set(rc={'figure.figsize':(15,10)})
sns.lineplot(x="date",y="deaths",data=MH,color="r")
plt.show()
fig = px.scatter(MH,x="date",y="confirmed")
fig.show()
fig = px.scatter(MH,x="date",y="deaths")
fig.show()
fig = px.bar(MH, x='date', y='deaths')
fig.show()
#Kerala
KL = df[df.state == 'Kerala']
KL
| date | state | cured | deaths | confirmed | |
|---|---|---|---|---|---|
| 0 | 2020-01-30 | Kerala | 0 | 0 | 1 |
| 1 | 2020-01-31 | Kerala | 0 | 0 | 1 |
| 2 | 2020-02-01 | Kerala | 0 | 0 | 2 |
| 3 | 2020-02-02 | Kerala | 0 | 0 | 3 |
| 4 | 2020-02-03 | Kerala | 0 | 0 | 3 |
| ... | ... | ... | ... | ... | ... |
| 15390 | 2021-05-28 | Kerala | 2198135 | 8063 | 2448554 |
| 15426 | 2021-05-29 | Kerala | 2224405 | 8257 | 2470872 |
| 15462 | 2021-05-30 | Kerala | 2252505 | 8455 | 2494385 |
| 15498 | 2021-05-31 | Kerala | 2281518 | 8641 | 2514279 |
| 15534 | 2021-06-01 | Kerala | 2310385 | 8815 | 2526579 |
489 rows × 5 columns
#Visualizing confirmed cases in kerala
sns.lineplot(x="date",y="confirmed",data=KL,color="r")
plt.show()
fig = px.scatter(KL,x="date",y="confirmed")
fig.show()
#Visualizing death cases in kerala
sns.set(rc={'figure.figsize':(15,10)})
sns.lineplot(x="date",y="deaths",data=KL,color="r")
plt.show()
fig = px.scatter(KL,x="date",y="deaths")
fig.show()
fig = px.bar(KL, x='date', y='deaths')
fig.show()